
* File to import pollution data from EPA Air Quality System 

***inputs:
* $OrigData/aqs_sites.csv
* $OrigData/daily_`poll'_`year'.csv, where `year' is 2013-2018 and poll is 42101 42401 42602 44201  88101

***outputs:
* $Data/aqs_sites.dta
* $Data/daily_PM2p5_1318.dta
* $Data/daily_ozone_1318.dta
* $Data/daily_so2_1318.dta
* $Data/daily_co_1318.dta
* $Data/daily_no2_1318.dta





***1. IMPORT SITE DATA ***


clear
* import raw data 
insheet using $OrigData/aqs_sites.csv

* create 3 digit zip code
	gen zip = round(zipcode/100)
	destring statecode, ignore("CC") replace

save $Data/aqs_sites.dta, replace



 drop if site_close< td(01jan2012)
 
 keep statecode countycode sitenumber latitude longitude 
 
 keep if longitude<. & latitude<. 
 
 export delimited $Data/aqs_sites_latlong.csv, replace





***2. IMPORT POLLUTION DATA ***

* import raw data by year 

	forval year = 2013(1)2018 {
	
	foreach poll in 42101 42401 42602 44201  88101  { 
	
clear
insheet using $OrigData/daily_`poll'_`year'.csv,

save $Data/daily_`poll'_`year'.dta, replace




								}
								
	}
			
			
			
			forval year = 2003(1)2012 {
	
	foreach poll in  88101  { 
	
clear
insheet using $OrigData/daily_`poll'_`year'.csv,

destring statecode, ignore("CC") replace

save $Data/daily_`poll'_`year'.dta, replace




								}
								
	}
			
			
******************************
**********PM2.5***********		
******************************
use $Data/daily_88101_2013.dta, clear

* append all years
	forval year = 2014(1)2018 {
	append using $Data/daily_88101_`year'.dta
erase $Data/daily_88101_`year'.dta
								}
	rename sitenum sitenumber

	rename poc instrument
	
	label variable instrument "Instrument Number"
	
	drop if sampleduration =="1 HOUR"
	
* merge to site information
	merge m:1 statecode countycode sitenumber using $Data/aqs_sites.dta, keepusing(zip)
	drop if _merge ==2
	drop _merge 

* create stata date
	gen date = date(datelocal,"YMD")
	format date %td

* drop variables
	keep arith* stmaxva aqi statecode countycode sitenumber date

* rename pollution variables	
	rename arith mean_pollution
	rename stmax max_pollution

	drop if mean_pollution<0
	
* average over instruments

collapse (mean) mean_pollution (max) max_pollution , by(statecode countycode sitenumber date)
	
	
save $Data/daily_PM2p5_1318.dta, replace

erase $Data/daily_88101_2013.dta



		
******************************
**********OZONE***********		
******************************


use $Data/daily_44201_2013.dta, clear

* append all years
	forval year = 2014(1)2018 {
	append using $Data/daily_44201_`year'.dta
erase $Data/daily_44201_`year'.dta
								}
erase $Data/daily_44201_2013.dta
	rename sitenum sitenumber


* merge to site information
	merge m:1 statecode countycode sitenumber using $Data/aqs_sites.dta, keepusing(zip)
	drop if _merge ==2

	drop _merge 

* create stata date
	gen date = date(datelocal,"YMD")
	format date %td

	rename arith mean_pollution
	rename stmaxvalue max_pollution

	
	drop if eventtype=="Excluded"
	
	drop if mean_pollution<0
	
* average over instruments

collapse (mean) mean_ozone = mean_pollution  (max) max_ozone= max_pollution , by(statecode countycode sitenumber date)
	
save $Data/daily_ozone_1318.dta, replace








		
******************************
**********S02***********		
******************************


use $Data/daily_42401_2013.dta, clear

* append all years
	forval year = 2014(1)2018 {
	append using $Data/daily_42401_`year'.dta
erase $Data/daily_42401_`year'.dta
	
	}
	
	erase $Data/daily_42401_2013.dta
	rename sitenum sitenumber

* merge to site information
	merge m:1 statecode countycode sitenumber using $Data/aqs_sites.dta, keepusing(zip)
	drop if _merge ==2

	drop _merge 

* create stata date
	gen date = date(datelocal,"YMD")
	format date %td
	
	drop if eventtype == "Excluded"

	keep if sampleduration =="1 HOUR"

	rename arith mean_pollution
	rename stmaxvalue max_pollution

	collapse (mean) mean_so2 = mean_pollution  (max) max_so2= max_pollution , by(statecode countycode sitenumber date)

	

save $Data/daily_so2_1318.dta, replace








		
******************************
**********CO***********		
******************************


use $Data/daily_42101_2013.dta, clear

* append all years
	forval year = 2014(1)2018 {
	append using $Data/daily_42101_`year'.dta
erase $Data/daily_42101_`year'.dta
								}


erase $Data/daily_42101_2013.dta
	rename sitenum sitenumber

* merge to site information
	merge m:1 statecode countycode sitenumber using $Data/aqs_sites.dta, keepusing(zip)
	drop if _merge ==2

	drop _merge 

* create stata date
	gen date = date(datelocal,"YMD")
	format date %td

	drop if eventtype == "Excluded"
	
	keep if sampleduration =="1 HOUR"

	rename arith mean_pollution
	rename stmaxvalue max_pollution

	collapse (mean) mean_co = mean_pollution  (max) max_co= max_pollution , by(statecode countycode sitenumber date)

	
	
save $Data/daily_co_1318.dta, replace






		
******************************
**********NO2***********		
******************************


use $Data/daily_42602_2013.dta, clear

* append all years
	forval year = 2014(1)2018 {
	append using $Data/daily_42602_`year'.dta
erase $Data/daily_42602_`year'.dta
								}

erase $Data/daily_42602_2013.dta

	rename sitenum sitenumber

* merge to site information
	merge m:1 statecode countycode sitenumber using $Data/aqs_sites.dta, keepusing(zip)
	drop if _merge ==2

	drop _merge 

* create stata date
	gen date = date(datelocal,"YMD")
	format date %td

	
	rename arith mean_pollution
	rename stmaxvalue max_pollution

	collapse (mean) mean_no2 = mean_pollution  (max) max_no2= max_pollution , by(statecode countycode sitenumber date)

	
	
save $Data/daily_no2_1318.dta, replace
